# ==============================================================================
# 2. STADIUM GEOMETRY DATABASE
# ==============================================================================
stadium_dims <- tibble(
team = c("ARI","ATL","BAL","BOS","CHC","CWS","CIN","CLE","COL","DET",
"HOU","KC","LAA","LAD","MIA","MIL","MIN","NYM","NYY","OAK",
"PHI","PIT","SD","SEA","SF","STL","TB","TEX","TOR","WSH"),
# Left, Left-Center, Center, Right-Center, Right
d_lf = c(330, 335, 333, 310, 355, 330, 328, 325, 347, 342,
315, 330, 330, 330, 344, 344, 339, 335, 318, 330,
329, 325, 334, 331, 339, 336, 315, 329, 328, 336),
d_lc = c(374, 385, 384, 379, 375, 375, 379, 370, 390, 370,
362, 385, 387, 375, 386, 371, 377, 370, 399, 375,
374, 383, 375, 378, 399, 375, 370, 372, 375, 377),
d_cf = c(407, 400, 400, 390, 400, 400, 404, 400, 415, 420,
409, 410, 396, 395, 407, 400, 404, 408, 408, 400,
401, 399, 396, 401, 391, 400, 404, 408, 400, 402),
d_rc = c(374, 375, 373, 420, 375, 375, 370, 375, 375, 365,
373, 385, 370, 375, 392, 374, 367, 370, 385, 375,
369, 375, 375, 381, 415, 375, 370, 374, 375, 370),
d_rf = c(330, 325, 318, 302, 353, 335, 325, 325, 350, 330,
326, 330, 330, 330, 335, 345, 328, 330, 314, 330,
330, 320, 322, 326, 309, 335, 322, 326, 328, 335)
)
get_stadium_poly <- function(team_code) {
s <- stadium_dims %>% filter(team == team_code)
if(nrow(s) == 0) return(NULL)
ref <- data.frame(
angle = c(-45, -22.5, 0, 22.5, 45),
dist = c(s$d_lf, s$d_lc, s$d_cf, s$d_rc, s$d_rf)
) %>% mutate(rad=angle*pi/180, x=dist*sin(rad), y=dist*cos(rad))
return(as.data.frame(spline(ref$x, ref$y, n = 100)))
}
# ==============================================================================
# 3. CLEANING & MODELING
# ==============================================================================
clean_data <- statcast_data %>%
# Filter for relevant events
filter(
events %in% c("field_out", "single", "double", "triple", "home_run", "sac_fly"),
!bb_type %in% c("ground_ball", "popup"), # Remove obvious infield plays
!is.na(hit_distance_sc), !is.na(launch_speed), !is.na(launch_angle)
) %>%
# Optional: Downsample if plot crashes (currently set to 100% data)
sample_frac(1) %>%
mutate(
fielder_raw = str_extract(des, "(?<=fielder\\s)[^.,]+"),
fielder_name = str_trim(fielder_raw),
is_caught = ifelse(events %in% c("field_out", "sac_fly"), 1, 0),
spray_angle = atan((hc_x - 125.42) / (198.27 - hc_y)) * 180 / pi,
raw_x = (hc_x - 125.42) * 2.5,
raw_y = (198.27 - hc_y) * 2.5,
angle_deg = atan2(raw_x, raw_y) * 180 / pi
)
# Global Catch Model
catch_model <- glm(is_caught ~ launch_speed + launch_angle + hit_distance_sc + abs(spray_angle),
data = clean_data, family = "binomial")
clean_data$predicted_prob <- predict(catch_model, clean_data, type = "response")
# ==============================================================================
# 4. BUILDING THE PLOT (WITH SNAPPING)
# ==============================================================================
teams_with_data <- sort(unique(clean_data$home_team))
teams_with_geom <- unique(stadium_dims$team)
valid_teams <- intersect(teams_with_data, teams_with_geom)
if(length(valid_teams) == 0) stop("No matching teams found in data.")
fig <- plot_ly()
infield <- data.frame(x = c(0, 63, 0, -63, 0), y = c(0, 63, 126, 63, 0))
visibility_list <- list()
for(i in seq_along(valid_teams)) {
t <- valid_teams[i]
# A. Geometry
s_geom <- stadium_dims %>% filter(team == t)
poly_t <- get_stadium_poly(t)
# Fence Lookup Function
ref_pts <- data.frame(
angle = c(-45, -22.5, 0, 22.5, 45),
dist = c(s_geom$d_lf, s_geom$d_lc, s_geom$d_cf, s_geom$d_rc, s_geom$d_rf)
)
get_fence_dist <- approxfun(ref_pts$angle, ref_pts$dist, rule = 2)
# B. Data Processing (Team Specific)
data_t <- clean_data %>%
filter(home_team == t) %>%
mutate(
catch_prob = ifelse(events == "home_run", 0, predicted_prob),
# 1. Snap Foul Balls
is_fair_hit = events %in% c("single", "double", "triple"),
fixed_angle = case_when(
is_fair_hit & angle_deg > 45 ~ 44,
is_fair_hit & angle_deg < -45 ~ -44,
TRUE ~ angle_deg
),
# 2. Snap Fake Homers
current_dist = sqrt(raw_x^2 + raw_y^2),
max_dist = get_fence_dist(fixed_angle),
final_dist = case_when(
events != "home_run" & current_dist >= max_dist ~ max_dist - 5,
TRUE ~ current_dist
),
coord_x = final_dist * sin(fixed_angle * pi / 180),
coord_y = final_dist * cos(fixed_angle * pi / 180),
# 3. Hover Info
display_fielder = ifelse(is_caught == 1 & !is.na(fielder_name), fielder_name, "N/A"),
hover_txt = paste0(
"<b>Batter:</b> ", player_name, "<br>",
"<b>Event:</b> ", events, "<br>",
"<b>Caught By:</b> ", display_fielder, "<br>",
"------------------<br>",
"<b>Exit Velo:</b> ", round(launch_speed, 1), " mph<br>",
"<b>Launch Angle:</b> ", round(launch_angle, 1), "°<br>",
"<b>Distance:</b> ", round(hit_distance_sc, 0), " ft<br>",
"<b>Catch Prob:</b> ", round(catch_prob * 100, 1), "%"
)
) %>%
# Filter out routine short fly balls (< 90% catch prob) to keep plot performant
filter(coord_y > 50, catch_prob < 0.90)
if(nrow(data_t) == 0) data_t <- data.frame(coord_x=0, coord_y=0, hover_txt="", catch_prob=0)[0,]
is_visible <- (i == 1)
# C. Add Traces
# Trace 1: Grass
fig <- fig %>% add_polygons(
x = c(0, poly_t$x, 0), y = c(0, poly_t$y, 0),
fillcolor = "#35682d", opacity = 0.8, line = list(color = "white", width = 2),
name = paste(t, "Field"), visible = is_visible, hoverinfo = "skip"
)
# Trace 2: Infield
fig <- fig %>% add_polygons(
data = infield, x = ~x, y = ~y,
fillcolor = "#8b4513", line = list(color = "black"),
name = "Infield", visible = is_visible, hoverinfo = "skip"
)
# Trace 3: Data Points
fig <- fig %>% add_markers(
x = data_t$coord_x,
y = data_t$coord_y,
text = data_t$hover_txt,
hoverinfo = "text",
marker = list(
size = 5, # Slightly smaller dots for dense data
line = list(color = "white", width = 0.2),
color = data_t$catch_prob,
colorscale = list(c(0, "green"), c(0.5, "yellow"), c(1, "red")),
cmin = 0, cmax = 1,
colorbar = list(title = "Catch Prob")
),
name = paste(t, "Balls"), visible = is_visible
)
vis_vec <- rep(FALSE, length(valid_teams) * 3)
vis_vec[((i-1)*3 + 1):((i-1)*3 + 3)] <- TRUE
visibility_list[[i]] <- list(method = "restyle", args = list("visible", vis_vec), label = t)
}
fig <- fig %>% layout(
title = "2024 MLB Fly Balls (Full Season)",
xaxis = list(visible = FALSE, range = c(-250, 250), fixedrange=TRUE),
yaxis = list(visible = FALSE, range = c(0, 480), fixedrange=TRUE),
showlegend = FALSE,
plot_bgcolor = "#f0f0f0",
updatemenus = list(list(y = 1.1, x = 0.1, buttons = visibility_list))
)
fig